Procrastination & Dementia

Markov Model Proposal

Authors
Affiliations

Hamilton Institute, Maynooth University, Ireland

Department of Psychology, Maynooth University, Ireland

Department of Mathematics and Statistics, Maynooth University, Ireland

Joanna McHugh Power

Department of Psychology, Maynooth University, Ireland

Show the code
rm(list = ls())

# Packages
suppressMessages(library(dplyr))
library(ggplot2)
library(ggalluvial)

# Functions -------------------------------------------------------------------
count_transitions <- function(data, years) {
# Counts the occurrences of cognitive status transitions across specified 
# years.
# Converts numeric cognitive status codes (1, 2, 3) into descriptive labels
# ("Normal Cognition", "MCI", "Dementia") and handles missing or unexpected values.
# Aggregates the data to count transitions and reshapes it into a long format for analysis.
# Arguments:
#    - data: The input dataset containing cognitive function data.
#    - years: A vector of years for which transitions should be counted.
# Returns:
#   - A dataset with counts of cognitive status transitions, including
#   "Missing" and "Other" categories.

# Create dynamic column names based on the years provided
  cogfunction_cols <- paste0("cogfunction", years)
  
  data |>
    select(ID, Total_p, all_of(cogfunction_cols)) |>
    mutate(across(
      all_of(cogfunction_cols),
      ~ case_when(
        .x == 1 ~ "Normal Cognition",
        .x == 2 ~ "MCI",
        .x == 3 ~ "Dementia",
        is.na(.x) ~ "Missing",  # Handle missing values
        TRUE ~ "Other"          # Handle other unexpected cases
      )
    )) |>
    count(across(all_of(cogfunction_cols))) |>
    mutate(ID = row_number()) |>
    tidyr::pivot_longer(
      cols = all_of(cogfunction_cols),
      names_to = "Wave",
      values_to = "Status"
    ) |>
    mutate(Status = factor(
      Status,
      levels = c("Normal Cognition", "MCI", "Dementia", "Missing")
    )) |>
    mutate(Wave = stringr::str_replace(Wave, "cogfunction", "HRS "))
}

plot_cognitive_scores <- function(data, year) {
# Plots the distribution of cognitive scores over a range of years.
# Converts the data from wide to long format and generates histograms for each wave.
# Arguments:
#   - data: The dataset containing cognitive scores.
#   - year: The starting year for the range of cognitive scores to plot.
# Returns:
#   - A ggplot object showing the distribution of cognitive scores across waves.
# Plotting dementia transitions
  data |>
  select(ID, any_of(paste0("cogtot27_imp", year:2022))) |>
  tidyr::pivot_longer(cols = !ID,
                      names_to = "Wave",
                      values_to = "Score") |>
  mutate(Wave = as.double(stringr::str_replace(Wave, "cogtot27_imp", ""))) |>
  ggplot(aes(x = Score)) +
  geom_histogram(fill = "skyblue",
                 colour = "black",
                 alpha = 0.5) +
  scale_x_continuous(breaks = seq(0, 27, by = 3)) +
  labs(
    title = paste0("Distribution of cognitive scores ", year, "- 2022"),
    x = "Cognitive Score", y = "") +
  facet_wrap( ~ Wave) +
  theme_minimal() +
  theme(
    plot.title = element_text(
      hjust = 0.5,
      size = 12,
      face = "bold",
      colour = "#2E2E2E"
    ),
    strip.text = element_text(
      size = 10,
      face = "bold",
      colour = "#2E2E2E"
    ),
    panel.grid = element_blank()
  )
}

plot_transitions <- function(data, size) {
# Visualizes cognitive status transitions over time using an alluvial plot.
# Represents the flow of individuals between cognitive states across waves.
# Arguments:
#   - data: The dataset containing transition data.
#   - size: The font size for labels in the plot.
# Returns:
#   - A ggplot object showing transitions between cognitive states over time.
  
  data |>
    ggplot(aes(
      x = Wave,
      y = n,
      stratum = Status,
      fill = Status,
      alluvium = ID
    )) +
    geom_stratum(alpha = 0.5, width = 0.5) +
    geom_flow(width = 0.5) +
    geom_text(
      stat = "stratum",
      aes(label = stringr::str_wrap(Status, width = 10)),
      hjust = 0.5,
      vjust = 0.5,
      size = size
    ) +
    labs(title = "Dementia transitions across time", x = "", y = "Frequency") +
    scale_fill_viridis_d(direction = -1) +
    theme_minimal() +
    theme(
      panel.grid = element_blank(),
      plot.title = element_text(
        size = 14,
        hjust = 0.5,
        face = "bold"
      ),
      axis.text = element_text(size = 10),
      axis.title = element_text(size = 10),
      text = element_text(size = 10)
    ) +
    ggeasy::easy_remove_legend()
}

extract_years <- function(data, years, scores = FALSE) {
# Extracts cognitive function data for specified years from a dataset.
# Converts numeric cognitive status codes (1, 2, 3) into descriptive labels
# ("Normal Cognition", "MCI", "Dementia") for easier interpretation.
# Arguments:
#   - data: The input dataset containing cognitive function data.
#   - years: A vector of years for which data should be extracted.
# Returns:
#   - A dataset with ID and cognitive status columns for the specified years.

  # Create dynamic column names based on the years provided
  cogfunction_cols <- paste0("cogfunction", years)
  cogtotal_cols    <- paste0("cogtot27_imp", years)
  
  if (scores == FALSE) {
    data |>
      # Select only the ID column and cognitive function columns for the specified years
      dplyr::select(ID,
                    dplyr::any_of(cogfunction_cols)) |>
      dplyr::mutate(dplyr::across(!c(ID),
        ~ dplyr::case_when(
          .x == 1 ~ "Normal Cognition",
          .x == 2 ~ "MCI",
          .x == 3 ~ "Dementia",
          TRUE ~ NA_character_  # To handle missing/other cases
        )
      ))
  } else {
    data |>
      select(ID, any_of(cogtotal_cols), any_of(cogfunction_cols))
  }
}

create_transitions <- function(data, absorbing = FALSE){
# Reshapes data from wide to long format to track cognitive status transitions over time.
# Calculates the next wave's cognitive status for each individual and creates a transition column.
# Optionally treats "Dementia" as an absorbing state, meaning once an individual is classified
# with dementia, their status cannot change in subsequent waves.
# Arguments:
#   - data: The dataset containing cognitive status data.
#   - absorbing: A logical flag indicating whether "Dementia" should be treated 
#   as an absorbing state.
# Returns:
#   - A dataset with transition information, including current and next wave statuses.
  
  # Reshape the data from wide to long format to track cognitive status over waves
  data <- data |>
    dplyr::select(ID, dplyr::starts_with("cogfunction")) |>
    tidyr::pivot_longer(cols = !ID,
                        names_to = "Wave",
                        values_to = "Status") |>
    dplyr::mutate(Wave = as.factor(stringr::str_replace(Wave, "cogfunction", ""))) |>
    # Arrange by ID and Wave to prepare for transition calculation
    dplyr::arrange(ID, Wave) |>
    dplyr::group_by(ID) |>
    # Get the next wave's cognitive status for each person
    dplyr::mutate(next_wave_status = dplyr::lead(Status)) |>
    dplyr::ungroup()
    
    # We can optionally specify dementia as an absorbing state
    # Once an individual is classified with dementia they cannot be classified
    # with anything else 
    if(absorbing == TRUE) {
    data <- data |>
      dplyr::group_by(ID) |>
      dplyr::mutate(
        Status = ifelse(cumany(Status == "Dementia"), "Dementia", Status),
        next_wave_status = ifelse(cumany(Status == "Dementia"), "Dementia", next_wave_status),
        transition = paste(Status, next_wave_status, sep = " to ")
      )
    }
  
    # Filter out rows where either the current or next status is missing
    data <- data |>
      dplyr::group_by(ID) |>
      dplyr::filter(!is.na(Status), !is.na(next_wave_status)) |>
      dplyr::ungroup() |>
      # Create a new column representing the transition from one status to the next
      dplyr::mutate(transition = paste(Status, next_wave_status, sep = " to "))
    
    return(data)
}

calculate_probabilties <- function(data) {
# Calculates the proportion of each cognitive status transition in the dataset.
# Counts the occurrences of each transition and computes their relative probabilities.
# Arguments:
#   - data: The dataset containing transition information.
# Returns:
#   - A dataset with transition probabilities, split into "from" and "to" states.

  # Calculate the proportion of each transition by dividing by the total count
  data |>
    dplyr::count(transition) |>
    dplyr::mutate(prop = n / sum(n)) |>
    tidyr::separate(transition, into = c("from", "to"), sep = " to ")
}

transition_matrix <- function(data) {
# Constructs a transition matrix from the calculated transition probabilities.
# The matrix represents the probability of moving from one cognitive state to another.
# Arguments:
#   - data: The dataset containing transition probabilities.
# Returns:
#   - A transition matrix with rows representing "from" states and columns 
#   representing "to" states.


  # Defining empty matrix matrix
  states <- c("Normal Cognition", "MCI", "Dementia")
  transition_matrix <- matrix(
    0,
    nrow = length(states),
    ncol = length(states),
    dimnames = list(from_state = states, to_state = states))

  # Fill the transition matrix with probabilities
  for (i in 1:nrow(data)) {
    from <- data$from[i]
    to <- data$to[i]
    prob <- data$prop[i]

    transition_matrix[from, to] <- prob
  }

  return(transition_matrix)
}

plot_matrix <- function(data, year, ts = FALSE) {
# Visualizes the transition matrix as a heatmap.
# Uses color gradients to represent transition probabilities and includes labels for clarity.
# Arguments:
#   - data: The transition matrix or dataset to plot.
#   - year: The year or time period associated with the data.
#   - ts: A logical flag indicating whether the title should include a time series label.
# Returns:
#   - A ggplot object representing the transition probabilities as a heatmap.

  if(ts == TRUE) {
    title <- paste0("Transition Probabilities Across Cognitive States - ", year)
  } else {
    title <- paste0("Transition Probabilities Across Cognitive States (", year, "-2022)")
  }

  # Reshaping data for plotting
  data <- data |>
    as.data.frame(row.names = FALSE) |>
    dplyr::mutate(from_state = c("Normal Cognition", "MCI", "Dementia")) |>
    reshape2::melt(id.vars = "from_state", variable.name = "to_state", value.name = "probability")

  # Plotting probabilities
  data |>
    ggplot(aes(x = from_state, y = to_state, fill = probability)) +
    geom_tile() +
    scale_fill_viridis_c(alpha = 0.8) +
    labs(title = title,
         x = "To", y = "From", fill = "Prob") +
    geom_text(aes(label = round(probability, digits = 3)), size = 5) +
    theme_minimal() +
    theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold"),
          axis.title = element_text(size = 10),
          axis.text = element_text(size = 8),
          panel.grid = element_blank()
    )
}

normalise <- function(x) {
# Normalizes a matrix so that each row sums to 1.
# Arguments:
#   - x: The input matrix to normalize.
# Returns:
#   - A normalized matrix where each row sums to 1.
  x / rowSums(x)
}

create_transition_matrix <- function(wave_data) {
# Aggregates transition probabilities and constructs a transition matrix for a specific wave.
# Converts the data into a wide format suitable for matrix operations.
# Arguments:
#   - wave_data: The dataset containing transition probabilities for a specific wave.
# Returns:
#   - A transition matrix for the specified wave.

  # Aggregate by 'from' and 'to' states to sum transition probabilities
  wave_data_agg <- wave_data |>
    dplyr::group_by(from, to) |>
    dplyr::summarise(transition_prob = sum(prop), .groups = "drop")
  
  wave_matrix <- wave_data_agg |>
    tidyr::pivot_wider(names_from = to, values_from = transition_prob, values_fill = 0) |>
    tibble::column_to_rownames("from") |>
    as.matrix()
  
  return(wave_matrix)
}

reshape_matrix <- function(matrix, wave) {
# Reshapes a transition matrix into a long format for easier analysis and visualization.
# Adds a "Wave" column to identify the time period associated with the matrix.
# Arguments:
#   - matrix: The transition matrix to reshape.
#   - wave: The wave or time period associated with the matrix.
# Returns:
#   - A long-format dataset with "from", "to", and "transition_prob" columns.

  matrix |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "from") |>
    tidyr::pivot_longer(cols = -from, names_to = "to", values_to = "transition_prob") |>
    dplyr::mutate(Wave = wave)
}

plot_transition_wave <- function(data, wave) {
# Plots transition probabilities for a specific wave as a heatmap.
# Filters the data to include only the main transitions and adds labels for clarity.
# Arguments:
#   - data: The dataset containing transition probabilities.
#   - wave: The wave or time period to plot.
# Returns:
#   - A ggplot object representing the transition probabilities for the 
#   specified wave.
  
  # Main transitions per wave
  filter_data <- data.frame(
    from = c("Normal Cognition", "Normal Cognition", "Normal Cognition",  "MCI", "MCI", "MCI", "Dementia", "Dementia", "Dementia"),
    to = c("Normal Cognition", "MCI", "Dementia", "Normal Cognition", "MCI", "Dementia", "Normal Cognition", "MCI", "Dementia")
  )
  
  data |>
    dplyr::semi_join(filter_data, by = c("from", "to")) |>
    dplyr::filter(Wave == wave) |>
    ggplot(aes(x = to, y = from, fill = transition_prob)) +
    geom_tile() +
    geom_text(aes(label = transition_prob), size = 5) +
    scale_fill_viridis_c(alpha = 0.8)+
    labs(title = paste0("Transition Probabilities Across Cognitive States (", wave, ")"),
         x = "To", y = "From", fill = "Prob") +
    theme_light() +
    theme(
      plot.title = element_text(hjust = 0.5, size = 12, face = "bold"),
      axis.title = element_text(size = 10),
      axis.text = element_text(size = 8),
      panel.grid = element_blank(),
      panel.border = element_blank()
    )
}

process_data <- function(data, absorbing = FALSE){
  data <- data |>
    tidyr::pivot_longer(
      cols = matches("cogtot27_imp|cogfunction"),
      names_to = c("Measure", "Year"),
      names_pattern = "(cogtot27_imp|cogfunction)(\\d+)",
      values_to = "Value"
    ) |>
    mutate(
      Measure = ifelse(Measure == "cogtot27_imp", "Score", "Class"),
      Year = factor(Year, levels = c("2016", "2018", "2020", "2022"))
    ) |>
    tidyr::pivot_wider(names_from = "Measure", values_from = "Value") |>
    mutate(
      Class = case_when(
        Class == 1 ~ "Normal Cognition",
        Class == 2 ~ "MCI",
        Class == 3 ~ "Dementia",
        TRUE ~ NA_character_
      )
    )
  
  # We can optionally specify dementia as an absorbing state
  # Once an individual is classified with dementia they cannot be classified
  # with anything else 
  # Additionally, we change their score to be 6 to reflect the highest possible
  # value you can get while still being classified with dementia
  if(absorbing == TRUE){
    data <- data |>
      group_by(ID) |>
      mutate(
        Class = ifelse(cumany(Class == "Dementia"), "Dementia", Class),
        Score = ifelse(Class == "Dementia" & Score > 6, 6, Score)) |>
      ungroup()
  }
  
  data <- data |>
    mutate(Class = factor(Class, levels = c("Normal Cognition", "MCI", "Dementia")))
  
  return(data)
}

backtrack_age <- function(data) {
  data |>
    rename(Age = Age_2022) |>
    mutate(
      Age = Age - case_when(
        Year == "2016" ~ 6,
        Year == "2018" ~ 4,
        Year == "2020" ~ 2,
        TRUE ~ 0
      )
    )
}

# Paths -----------------------------------------------------------------------
path_data <- "./01__Data/02__Processed/"

Overview

The Health and Retirement Study (HRS) collects an extensive array of data related to cognitive health, physical well-being, economic status, and psychosocial factors. Among the information gathered, specific variables are used to assess and classify individuals into distinct categories of cognitive functioning. These classifications help to better understand the progression of cognitive decline, enabling researchers to track trends and identify factors that may influence cognitive health over time.

Cognitive Function Categories

  • Normal Cognition: Individuals in this category show no significant signs of cognitive impairment and are able to function independently in daily activities.
  • Mild Cognitive Impairment (MCI): This stage reflects a slight but noticeable decline in cognitive abilities, such as memory or thinking skills, that goes beyond what would be expected with normal aging but does not yet interfere significantly with daily life.
  • Dementia: This classification is marked by more severe cognitive deficits, impacting memory, reasoning, and the ability to perform everyday tasks. Dementia encompasses various neurodegenerative conditions, with Alzheimer’s disease being the most common.

By tracking transitions between these categories, researchers can gain insights into the factors that contribute to cognitive decline and identify potential interventions to promote healthy aging.

Langa-Weir Classifications

For previous waves of HRS data (1995 - 2020) there is a researcher contributed dataset of dementia classifications (Langa, 2023). Researchers have used this dataset to study the trajectories of cognitive aging, dementia risk, and related health outcomes in older adults. However, with the recent release of the 2022 HRS data, these classifications have yet to be updated

LWC2022 Package

In order to address this we developed the LWC2022 package (Monaghan et al., 2024) to replicate the methodology of Langa (2023). This package automates the classification process, ensuring consistency with previous waves and providing researchers with an up-to-date resource for studying cognitive decline and dementia across all available HRS waves. Figure 1 illustrates the LWC workflow.

Figure 1: LWC Workflow

Dementia Dataset

Our updated dataset contains 13 columns of cognitive classifications spanning from 1996 to 2022. These columns reflect dementia status across each wave, using the same criteria and structure as the original Langa-Weir dataset, now extended to include the most recent 2022 wave.

Show the code
data <- readxl::read_xlsx(here::here(path_data, "data.xlsx"))

dementia_data <- data |>
  count_transitions(years = seq(1996, 2022, by = 2))

glimpse(dementia_data)
Rows: 4,466
Columns: 4
$ n      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ ID     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, …
$ Wave   <chr> "HRS 1996", "HRS 1998", "HRS 2000", "HRS 2002", "HRS 2004", "HR…
$ Status <fct> MCI, Dementia, MCI, MCI, MCI, Dementia, Dementia, MCI, MCI, Dem…

Missing data

Since the participant pool is drawn from the 2020 wave of the HRS data, any analysis that looks back at previous waves will inevitably encounter missing data. Not all participants were included in earlier waves, leading to gaps in the data. Figure 2 displays the frequency of missing cognitive test data across each HRS wave.

Show the code
# Dynamic colours
colours <- c(rep("#2e2e2e", times = 12), "red", "#2e2e2e")

data |>
  select(any_of(paste0("cogtot27_imp", 1996:2022))) |>
  rename_with( ~ stringr::str_replace(., "cogtot27_imp", ""), starts_with("cogtot27")) |>
  visdat::vis_dat(palette = "cb_safe") +
  labs(title = "Missing data across HRS Waves (1996 - 2022)",
       subtitle = "Total Cognitive Scores") +
  theme(
    plot.title = element_text(hjust = 0.5, size = 12, face = "bold", colour = "#2e2e2e"),
    plot.subtitle = element_text(hjust = 0.5, size = 10, colour = "#2e2e2e"),
    axis.text.x = element_text(colour = colours)) +
  ggeasy::easy_remove_legend() +
  ggeasy::easy_x_axis_labels_size(size = 9)
Figure 2: Occurance of missing data across HRS waves (participants were gathered from the 2020 HRS wave [in red])

The HRS had its most recent participant intake in 2016, which explains the notable decline in missing data occurrences from that point onward. As new participants were not added after 2016, the dataset becomes more complete in subsequent waves, with fewer missing values.

Given this shift, we will conduct our analysis on both:

  • A full dataset (1996 - 2022)
  • A reduced dataset (2016 - 2022).

Exploratory Data Analysis

Cognitive Test Scores

Initially, we will plot a distribution of the cognitive test scores (ranging from 0 - 27) across time for all HRS participants.

Show the code
plot_cognitive_scores(data = data, year = 1996)
Figure 3: Cognitive test scores (1996 - 2022)

Classification proportion per year

Figure 4 shows the proportion of dementia classifications per HRS wave. The variable Missing represents the procrastination HRS respondents who were not yet interviewed by the HRS. Since the analysis focuses on participants included in the 2020 HRS wave, any retroactive analysis of prior waves may result in missing data for certain individuals

Show the code
# Plotting proportions --------------------------------------------------------
dementia_data %>%
  ggplot(aes(x = Wave, fill = Status)) +
  geom_bar(position = "fill", 
           alpha = 0.5,
           width = 0.5,
           colour = "black") +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "Proportion of Cognitive Status Classifications (1996 - 2022)", 
       x = "", y = "Percentage") +
  scale_fill_viridis_d(direction = -1) +
  theme_minimal() +
  theme(panel.grid = element_blank(),
        plot.title = element_text(size = 14, hjust = 0.5, face = "bold"),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 10),
        text = element_text(size = 10)) +
  ggeasy::easy_move_legend("bottom") +
  ggeasy::easy_remove_legend_title()
Figure 4: Proportion of dementia classifications (1996 - 2022)

Dementia transitions per year

Figure 5 is an alluvial graph illustrating the transitions in cognitive classifications from one HRS wave to the next.

Show the code
dementia_data |>
  plot_transitions(size = 2.5)
Figure 5: Dementia transitions (1996 - 2022)

Cognitive Test Scores

Initially, we will plot a distribution of the cognitive test scores (ranging from 0 - 27) across time for all HRS participants.

Show the code
plot_cognitive_scores(data = data, year = 2016)
Figure 6: Cognitive test scores (2016 - 2022)

Classification proportion per year

Figure 7 shows the proportion of dementia classifications from HRS 2016 - 2022. The variable Missing represents the procrastination HRS respondents who were not yet interviewed by the HRS. Since the analysis focuses on participants included in the 2020 HRS wave, any retroactive analysis of prior waves may result in missing data for certain individuals

Show the code
# Plotting proportions --------------------------------------------------------
data |>
  count_transitions(years = seq(2016, 2022, by = 2)) |>
  ggplot(aes(x = Wave, fill = Status)) +
  geom_bar(position = "fill", alpha = 0.5, colour = "black") +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "Proportion of Cognitive Status Classifications (2016 - 2022)", 
       x = "", y = "Percentage") +
  scale_fill_viridis_d(direction = -1) +
  theme_minimal() +
  theme(panel.grid = element_blank(),
        plot.title = element_text(size = 14, hjust = 0.5, face = "bold"),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 10),
        text = element_text(size = 10)) +
  ggeasy::easy_move_legend("bottom") +
  ggeasy::easy_remove_legend_title()
Figure 7: Proportion of dementia classifications per year

Dementia transitions per year

Figure 8 is an alluvial graph illustrating the transitions in cognitive classifications from one HRS wave to the next.

Show the code
data |>
  count_transitions(years = seq(2016, 2022, by = 2)) |>
  plot_transitions(size = 2.5)
Figure 8: Dementia transitions (2016 - 2022)

Markov Modelling

In this section, we apply Markov modelling to analyze the transitions between cognitive states (Normal Cognition, Mild Cognitive Impairment [MCI], and Dementia) using longitudinal data from the Health and Retirement Study (HRS) from 1996 to 2022. By modeling these transitions, we aim to understand the probabilities of moving between different cognitive states over time.

Data preparation

The first step is to prepare the transition dataset. We start by transforming the data to create a record of transitions from one cognitive state to another between consecutive waves (1996 - 2022).

Once the transitions are identified, we calculate the probabilities of each state-to-state transition. These probabilities are computed by counting the occurrences of each transition and dividing by the total number of transitions.

Show the code
tran_prob_full <- data |>
  extract_years(years = seq(1996, 2022, by = 2)) |>
  create_transitions(absorbing = TRUE) |>
  calculate_probabilties()

tran_prob_full |>
  mutate(prop = round(prop, digits = 3)) |>
  head(n = 9) |>
  gt::gt() |>
  gtExtras::gt_theme_538() |>
  gt::cols_align(align = "center") |>
  gt::tab_header(title = "Overall Transition Probabilties (1996 - 2022)") |>
  gt::tab_options(table.width = gt::pct(100)) |>
  gt::opt_align_table_header(align = "center")
Overall Transition Probabilties (1996 - 2022)
from to n prop
Dementia Dementia 360 0.141
MCI Dementia 11 0.004
MCI MCI 71 0.028
MCI Normal Cognition 108 0.042
Normal Cognition Dementia 9 0.004
Normal Cognition MCI 143 0.056
Normal Cognition Normal Cognition 1848 0.725

Creating transition matrix

Next, we organize the transition probabilities into a matrix format, where rows represent the “from” states and columns represent the “to” states. The matrix entries contain the transition probabilities for each state-to-state pair

Show the code
tran_matrix_full <- tran_prob_full |>
  transition_matrix() |>
  normalise()

tran_matrix_full
                  to_state
from_state         Normal Cognition       MCI   Dementia
  Normal Cognition        0.9240000 0.0715000 0.00450000
  MCI                     0.5684211 0.3736842 0.05789474
  Dementia                0.0000000 0.0000000 1.00000000

This code creates a 3 \times 3 matrix (for the three states: Normal Cognition, MCI, and Dementia) and populates it with the transition probabilities.

Visualising the matrix

To make the transition matrix more interpretable, we visualize the probabilities using a heat map (Figure 9)

Show the code
plot_matrix(data = t(tran_matrix_full), year = 1996)
Figure 9: Transition Matrix

Data preparation

The first step is to prepare the transition dataset. We start by transforming the data to create a record of transitions from one cognitive state to another between consecutive waves (2016 - 2022).

Once the transitions are identified, we calculate the probabilities of each state-to-state transition. These probabilities are computed by counting the occurrences of each transition and dividing by the total number of transitions.

Show the code
tran_prob_reduced <- data |>
  extract_years(years = seq(2016, 2022, by = 2)) |>
  create_transitions(absorbing = TRUE) |>
  calculate_probabilties()

tran_prob_reduced |>
  mutate(prop = round(prop, digits = 3)) |>
  head(n = 9) |>
  gt::gt() |>
  gtExtras::gt_theme_538() |>
  gt::cols_align(align = "center") |>
  gt::tab_header(title = "Overall Transition Probabilties (2016 - 2022)") |>
  gt::tab_options(table.width = gt::pct(100)) |>
  gt::opt_align_table_header(align = "center")
Overall Transition Probabilties (2016 - 2022)
from to n prop
Dementia Dementia 151 0.056
MCI Dementia 31 0.011
MCI MCI 122 0.045
MCI Normal Cognition 179 0.066
Normal Cognition Dementia 20 0.007
Normal Cognition MCI 219 0.081
Normal Cognition Normal Cognition 1984 0.733

Creating transition matrix

Next, we organize the transition probabilities into a matrix format, where rows represent the “from” states and columns represent the “to” states. The matrix entries contain the transition probabilities for each state-to-state pair. Additionally, we normalise the matrix so

Show the code
tran_matrix_reduced <- tran_prob_reduced |>
  transition_matrix() |>
  normalise()

tran_matrix_reduced
                  to_state
from_state         Normal Cognition        MCI    Dementia
  Normal Cognition        0.8924876 0.09851552 0.008996851
  MCI                     0.5391566 0.36746988 0.093373494
  Dementia                0.0000000 0.00000000 1.000000000

This code creates a 3 \times 3 matrix (for the three states: Normal Cognition, MCI, and Dementia) and populates it with the transition probabilities.

Visualising the matrix

To make the transition matrix more interpretable, we visualize the probabilities using a heat map (Figure 10)

Show the code
plot_matrix(data = t(tran_matrix_reduced), year = 2016)
Figure 10: Transition Matrix

Per Wave Probabilities

Instead of looking at the overall probabilities of transitioning between cognitive states, we now examine how these transition probabilities change across multiple waves of data collection. This allows us to understand how cognitive states (Normal Cognition, Mild Cognitive Impairment (MCI), and Dementia) evolve over time within the study population.

Creating per wave transition matrices

We begin we create transition matrices for each wave of data. Each matrix will represent the probabilities of transitioning from one cognitive state to another within a given wave.

Show the code
create_transition_matrix <- function(wave_data) {
  # Aggregate by 'from' and 'to' states to sum transition probabilities
  wave_data_agg <- wave_data |>
    dplyr::group_by(from, to) |>
    dplyr::summarise(transition_prob = sum(prop), .groups = "drop")
  
  wave_matrix <- wave_data_agg |>
    tidyr::pivot_wider(
      names_from = to,
      values_from = transition_prob,
      values_fill = 0
    ) |>
    tibble::column_to_rownames("from") |>
    as.matrix()
  
  return(wave_matrix)
}

# Calculating transition matrix
tran_matrices_reduced <- data |>
  extract_years(years = seq(1998, 2022, by = 2)) |>
  create_transitions(absorbing = TRUE) |>
  group_by(Wave) |>
  calculate_probabilties() |>
  group_map( ~ create_transition_matrix(.x), .keep = TRUE)

tran_matrices_reduced[[13]] <- NULL

head(tran_matrices_reduced, n = 3)
[[1]]
                    Dementia        MCI Normal Cognition
Dementia         0.011152416 0.00000000       0.00000000
MCI              0.007434944 0.01115242       0.02973978
Normal Cognition 0.000000000 0.06319703       0.87732342

[[2]]
                    Dementia        MCI Normal Cognition
Dementia         0.018796992 0.00000000       0.00000000
MCI              0.000000000 0.03383459       0.04135338
Normal Cognition 0.003759398 0.06015038       0.84210526

[[3]]
                    Dementia        MCI Normal Cognition
Dementia         0.022727273 0.00000000       0.00000000
MCI              0.003787879 0.03030303       0.06060606
Normal Cognition 0.000000000 0.03030303       0.85227273

Reshaping matrix

Once we have the transition matrices, we need to reshape them into a long format for easier plotting and further analysis. In the long format, each row represents a single transition, along with the corresponding transition probability and the wave of data collection.

Show the code
# Function to reshape each matrix and add the Wave information
reshape_matrix <- function(matrix, wave) {
  matrix |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "from") |>
    tidyr::pivot_longer(cols = -from,
                        names_to = "to",
                        values_to = "transition_prob") |>
    mutate(Wave = wave)
}

# I want my matrix in a certain order
tran_matrices_reduced <- lapply(tran_matrices_reduced, function(mat) {
  mat[c("Normal Cognition", "MCI", "Dementia"), c("Normal Cognition", "MCI", "Dementia")] |>
    normalise()
})

# Creating a singular dataframe of transition probabilities --------------------
wave_names <- c(seq(1998, 2020, by = 2))

# Apply the reshaping function to each matrix and combine the results
transition_probabilties_long <- purrr::map2_dfr(tran_matrices_reduced, wave_names, reshape_matrix) |>
  mutate(
    transition_prob = round(transition_prob, digits = 3),
    from = factor(from, levels = c("Normal Cognition", "MCI", "Dementia"))
  )

# Outputting table
DT::datatable(
  transition_probabilties_long,
  filter = "top",
  colnames = c("Prob" = "transition_prob"),
  options = list(pageLength = 6, autoWidth = TRUE)
)

Plotting transition probabilities

Finally, we plot the transition probabilities as heatmaps to visualize how they evolve across different waves. We will create an animated GIF that cycles through the waves, showing the changes in transition probabilities over time.

Show the code
plot_transition_wave <- function(data, wave) {
  # Main transitions per wave
  filter_data <- data.frame(
    from = c(
      "Normal Cognition",
      "Normal Cognition",
      "Normal Cognition",
      "MCI",
      "MCI",
      "MCI",
      "Dementia",
      "Dementia",
      "Dementia"
    ),
    to = c(
      "Normal Cognition",
      "MCI",
      "Dementia",
      "Normal Cognition",
      "MCI",
      "Dementia",
      "Normal Cognition",
      "MCI",
      "Dementia"
    )
  )
  
  data |>
    semi_join(filter_data, by = c("from", "to")) |>
    filter(Wave == wave) |>
    ggplot(aes(x = to, y = from, fill = transition_prob)) +
    geom_tile() +
    geom_text(aes(label = transition_prob), size = 5) +
    scale_fill_viridis_c(alpha = 0.8) +
    labs(
      title = paste0("Transition Probabilities Across Cognitive States (", wave, ")"),
      x = "To",
      y = "From",
      fill = "Prob"
    ) +
    theme_light() +
    theme(
      plot.title = element_text(
        hjust = 0.5,
        size = 12,
        face = "bold"
      ),
      axis.title = element_text(size = 10),
      axis.text = element_text(size = 8),
      panel.grid = element_blank(),
      panel.border = element_blank()
    )
}

for (wave in wave_names) {
  print(plot_transition_wave(data = transition_probabilties_long, wave = wave))
}

Cognitive Scores

In this section, we incorporate participants’ cognitive scores (from 2016 - 2022) into our analysis. The cognitive scores are visualized in Figure 6, which provides a summary of the data.

Show the code
# Data processing --------------------------------------------------------------
ed_level <- c(
  "No_degree", "GED", "High_school", "College_2_years", 
  "College_4_years", "Masters", "PhD", "Other")

ed_label <- c(
  "No degree", "GED", "High school", "College (2 years)", 
  "College (4 years)", "Masters", "PhD", "Other")

# Variables to add to datafile
adding <- data |>
  dplyr::select(ID, Age_2022, Education, Total_p) |>
  dplyr::mutate(
    Education = dplyr::case_when(
      Education == 0 ~ "No_degree",
      Education == 1 ~ "GED",
      Education == 2 ~ "High_school",
      Education == 3 ~ "College_2_years",
      Education == 4 ~ "College_4_years",
      Education == 5 ~ "Masters",
      Education == 6 ~ "PhD",
      Education == 9 ~ "Other"
    ),
    Education = factor(Education, levels = ed_level, labels = ed_label)
  )
 
cog_data <- data |>
  extract_years(years = seq(2016, 2022, by = 2), scores = TRUE) |>
  process_data(absorbing = TRUE) |>
  dplyr::inner_join(adding, by = "ID") |>
  backtrack_age() |>
  dplyr::group_by(ID) |>
  dplyr::filter(all(!is.na(Score))) |>
  dplyr::ungroup() |>
  dplyr::filter(!is.na(Total_p)) |>
  dplyr::mutate(Total_p = ifelse(Year == 2022, Total_p, NA)) 


# Outputting table
cog_data |>
  dplyr::relocate(c(Age, Education), .after = Year) |>
  dplyr::rename(Procrastination = Total_p) |>
  head(n = 12) |>
  gt::gt() |>
  gtExtras::gt_theme_538() |>
  gt::cols_align(align = "center") |>
  gt::tab_header(title = "Cognition Scores (2016 - 2022)") |>
  gt::tab_options(table.width = gt::pct(100)) |>
  gt::opt_align_table_header(align = "center")
Cognition Scores (2016 - 2022)
ID Year Age Education Score Class Procrastination
1 2016 72 College (4 years) 20 Normal Cognition NA
1 2018 74 College (4 years) 16 Normal Cognition NA
1 2020 76 College (4 years) 14 Normal Cognition NA
1 2022 78 College (4 years) 18 Normal Cognition 22
2 2016 75 Masters 19 Normal Cognition NA
2 2018 77 Masters 16 Normal Cognition NA
2 2020 79 Masters 17 Normal Cognition NA
2 2022 81 Masters 18 Normal Cognition 41
3 2016 83 No degree 4 Dementia NA
3 2018 85 No degree 4 Dementia NA
3 2020 87 No degree 2 Dementia NA
3 2022 89 No degree 3 Dementia 60

Visualisations

To explore the relationships between cognitive scores and other variables, we create a series of visualizations. These plots help us identify trends, correlations, and potential areas for further investigation.

The first visualization (Figure 11) examines the relationship between cognitive scores and age. A scatter plot is used to display the data, with points jittered to reduce overlap.

Show the code
cog_data |>
  ggplot(aes(x = Age, y = Score, colour = Class)) +
  geom_jitter(size = 2, width = 0.2, height = 0.2) +
  scale_x_continuous(breaks = seq(0, 100, by = 10)) +
  scale_y_continuous(breaks = seq(0, 27, by = 5)) +
  facet_wrap(~ Year) +
  theme_bw() +
  ggeasy::easy_remove_legend_title() +
  ggeasy::easy_move_legend(to = "bottom")
Figure 11: Scatter plot of age vs cognitive scores

The second visualization (Figure 12) explores the relationship between cognitive scores and education levels. A bar plot is used to display the mean cognitive score for each education level, faceted by year.

Show the code
cog_data |>
  group_by(Education, Year) |>
  summarise(Score = mean(Score), .groups = "keep") |>
  ggplot(aes(x = Education, y = Score, fill = forcats::fct_rev(Education))) +
  geom_bar(stat = "identity", width = 0.75, alpha = 0.6) +
  scale_y_continuous(breaks = seq(0, 27, by = 3)) +
  labs(title = "Mean Cognitive Score by Education Level",
       subtitle = "Across Time", x = "", y = "Total Cognitive Score") +
  facet_wrap( ~ Year) +
  theme_minimal() +
  coord_flip() +
  ggeasy::easy_remove_legend() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 15, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, size = 15, face = "bold"),
    axis.text.y = element_text(size = 12),
    strip.text = element_text(size = 10, face = "bold"))
Figure 12: Mean cognitive score by education level

The final visualization (Figure 13) investigates the relationship between cognitive scores and procrastination tendencies. A scatter plot is used to display the data for the year 2022.

Show the code
cog_data |>
  filter(Year == 2022) |>
  ggplot(aes(x = Total_p, y = Score)) +
  geom_jitter(width = 0.25, size = 1.5, alpha = 0.5) +
  labs(
    title = "Procrastination vs. Cognitive Classification Score",
    x = "Total Procrastination", 
    y = "Cognitive Classification Score") +
  scale_x_continuous(breaks = seq(0, 60, by = 10)) +
  scale_y_continuous(breaks = seq(0, 27, by = 5)) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10))
Figure 13: Scatter plot of procrastination vs cognitive scores

References

Langa, K. M. (2023). Langa-weir classification of cognitive function (1995-2020). Survey Research Center, Institute for Social Research, University of Michigan. Https://Hrsdata. Isr. Umich. Edu/Sites/Default/Files/Documentation/Data-Descriptions/1680034270/Data_Description_Langa_Weir_ Classifications2020. Pdf.
Monaghan, C., de Andrade Moral, R., & McHugh Power, J. (2024). lwc2022: Langa-weir classification of cognitive function for 2022 HRS data. https://github.com/C-Monaghan/lwc2022
 

An analysis by Cormac Monaghan

This page was built with and Quarto